Three independant shp files for moors/wetlands were provided, so here the converted to common crs and combined into a single layer
link <- "/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/DE_wiedervernaesst/Wiedervernaessung_MV_final.shp"
not_all_na <- function(x) any(!is.na(x)) # little helper function to select only cols that contain data
MV <- read_sf(link) |> select(where(not_all_na)) |> select(geometry) |> mutate(rewetted = TRUE)link <- "/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/DE_wiedervernaesst/Wiedervernaessung_BB_final.shp"
BB <- read_sf(link) |> select(where(not_all_na)) |> select(geometry) |> mutate(rewetted = TRUE)link <- "/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/moork/moor_kbk25.shp"
moor <- read_sf(link) |> select(geometry) |> mutate(rewetted = FALSE)link <- "/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/gloez2_moor/gloez2_moo_fk.shp"
BB_moor <- read_sf(link) |> select(geometry) |> st_transform(st_crs(MV)) |> mutate(rewetted = FALSE)wetlands <- rbind(MV, BB) |>
rbind(moor) |>
rbind(BB_moor)
plot(vect(wetlands))# write shp to disk
library(rgdal) ## Loading required package: sp
## Please note that rgdal will be retired by the end of 2023,
## plan transition to sf/stars/terra functions using GDAL and PROJ
## at your earliest convenience.
##
## rgdal: version: 1.5-32, (SVN revision 1176)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.0.4, released 2020/01/28
## Path to GDAL shared files: /usr/share/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /usr/share/proj
## Linking to sp version:1.5-0
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading sp or rgdal.
##
## Attaching package: 'rgdal'
## The following object is masked from 'package:terra':
##
## project
library(rgeos) ## rgeos version: 0.6-1, (SVN revision 692)
## GEOS runtime version: 3.8.0-CAPI-1.13.1
## Please note that rgeos will be retired during 2023,
## plan transition to sf functions using GEOS at your earliest convenience.
## Linking to sp version: 1.5-1
## Polygon checking: TRUE
##
## Attaching package: 'rgeos'
## The following object is masked from 'package:dplyr':
##
## symdiff
st_write(wetlands, "/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_vector/wetland_mask.shp", delete_layer = TRUE)## Deleting layer `wetland_mask' using driver `ESRI Shapefile'
## Writing layer `wetland_mask' to data source
## `/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_vector/wetland_mask.shp' using driver `ESRI Shapefile'
## Writing 36439 features with 1 fields and geometry type Unknown (any).
Using the force-cube module the wetland vect mask is converted into cubed tif
wetland_mask <- rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/mosaic/wetland_mask.vrt") #|>
# reproject to match format of landcover (if not done there are artificates in the mask from imprecise reprojection)
#project(rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/DE_cover/classification_map_germany_2020_v02.tif"))
plot(wetland_mask)(source: https://www.mundialis.de/en/deutschland-2020-landbedeckung-auf-basis-von-sentinel-2-daten/)
download.file('https://data.mundialis.de/geodata/lulc-germany/classification_2020/classification_map_germany_2020_v02.tif',
destfile = "/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/DE_cover/classification_map_germany_2020_v02.tif")forest_mask <- rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/DE_cover/classification_map_germany_2020_v02.tif")
#wetland_mask_repojected <- NULL
# Cover classes in classification map
#10: forest
#20: low vegetation
#30: water
#40: built-up
#50: bare soil
#60: agriculture
forest_mask[forest_mask < 11] <- 1
forest_mask[forest_mask > 1] <- NA
forest_mask <- forest_mask |> project(wetland_mask)
plot(forest_mask, col=c("black"))
#forest_mask_buffered <- buffer(forest_mask, width=20, background=NA)
#forest_mask_buffered[isTRUE(forest_mask_buffered)] <- 1
#forest_mask_buffered[isFALSE(forest_mask_buffered)] <- NA
#forest_mask_buffered[!is.na(forest_mask_buffered)] <- 1
#plot(forest_mask_buffered, col=c("black"))
terra::writeRaster(
forest_mask,
filename = paste0("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/DE_cover/classification_map_germany_2020_v02_forest_mask.tif"),
filetype = 'GTiff',
datatype = 'INT2S',
overwrite = TRUE
)
terra::writeRaster(
forest_mask,
filename = paste0("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/DE_cover/classification_map_germany_2020_v02_forest_mask_50m_buffer.tif"),
filetype = 'GTiff',
datatype = 'INT2S',
overwrite = TRUE
)wetland_mask_v2 <- terra::mask(wetland_mask, forest_mask, maskvalue=1, updatevalue=NA)
# filter out non 1
#wetland_mask_v2[wetland_mask_v2 < 1] <- NA
plot(wetland_mask_v2, col=c("black","blue"))
terra::writeRaster(
wetland_mask_v2,
filename = paste0("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_v2.tif"),
filetype = 'GTiff',
datatype = 'INT2S',
overwrite = TRUE
)wetland_mask <- rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_v2.tif") #|>
# reproject to match format of landcover (if not done there are artificates in the mask from imprecise reprojection)
# project(rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/Crop_GER_2019/CTM_GER_2019.tif"))
plot(wetland_mask)crop_map <- rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/Crop_GER_2019/CTM_GER_2019.tif") #%>crop(wetland_mask)
#wetland_mask_repojected <- NULL
# Cover classes in classification map
# 10 Grassland
# 31 Winter wheat
# 32 Winter rye
# 33 Winter barley
# 34 Other winter cereal
# 41 Spring barley
# 42 Spring oat
# 43 Other spring cereal
# 50 Winter rapeseed
# 60 Legume
# 70 Sunflower
# 80 Sugar beet
# 91 Maize
# 92 Maize (grain)
# 100 Potato
# 110 Grapevine
# 120 Strawberry
# 130 Asparagus
# 140 Onion
# 150 Hops
# 160 Orchard
# 181 Carrot
# 182 Other vegetables
# 555 Small woody features
# 999 Other agricultural areas
crop_map[crop_map %in% c(31, 32, 33, 34, 41, 42, 43, 50, 60, 70, 80, 91, 92, 100, 110, 120, 130, 140, 150, 160, 181, 182)] <- 1
crop_map[crop_map %in% c(10, 555, 999)] <- 2
crop_map <- crop_map |> project(wetland_mask)
plot(crop_map, col=c("green", "blue"))
terra::writeRaster(
crop_map,
filename = paste0("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/Crop_GER_2019/CTM_GER_2019_v2.tif"),
filetype = 'GTiff',
datatype = 'INT2S',
overwrite = TRUE
)wetland_mask_v2 <- terra::mask(wetland_mask_v2, crop_map, maskvalue=1, updatevalue=NA)
wetland_mask_v2[!wetland_mask_v2 == 1] <- 0
plot(wetland_mask_v2)
terra::writeRaster(
wetland_mask_v2,
filename = paste0("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_v2.tif"),
filetype = 'GTiff',
datatype = 'INT2S',
overwrite = TRUE
)grassland_map <- rast("/data/Dagobah/greengrass/grassland_ger_ls_s2/GL_change_mask/GL_mask.tif")
plot(grassland_map)
grassland_map <- grassland_map |> project(wetland_mask)
wetland_mask_v2 <- terra::mask(wetland_mask_v2, grassland_map, maskvalue=1, updatevalue=NA)
wetland_mask_v2[!wetland_mask_v2 == 1] <-0
plot(wetland_mask_v2, col=c("black"))
terra::writeRaster(
wetland_mask_v2,
# saved as version 3 as original workflow did not mask grasslands
filename = paste0("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_v3.tif"),
filetype = 'GTiff',
datatype = 'INT2S',
overwrite = TRUE
)100 random points per feature, with a minimum of 50 m between points and 20 meters buffer to edge of exent. After 5 unsuccessful attempts of sampling a point meeting the criteria the sampling algorithm moved on to the next feature.
random <- read_sf("/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_random/sample_points_random.shp") |>
vect()
plot(random)
print(paste("the random sampling strategy yielded", nrow(random), "sample points."))regular <- read_sf("/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_regular/sample_points_regular.shp") |>
vect()
plot(regular)
print(paste("the regular sampling strategy yielded", nrow(regular), "sample points."))# import updatede wetland mask
wetland_mask <- rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_v2.tif") #|> crop(ext(4546026, 4676026, 3334920, 3524920))
# There are no methods for negative buffering of rasters in R. This is a workaround that inverts the wetland mask and using an ifelse inverts na values and buffers into the valid raster pixels
wetland_mask <- !buffer(ifel(is.na(wetland_mask), 1, NA),
width=50)
# Replace false values with NA so that they can me omitted in the sampling process
wetland_mask <- subst(wetland_mask, FALSE, NA)
# Sample points on a 200x200 m regular grid
samples <- spatSample(wetland_mask,
size = size(wetland_mask)/200, # get total size and divide to reduce to 200x200m
method = "regular",
as.points =T,
exhaustive=T, na.rm=TRUE)
plot(samples)grid <- read_sf("/data/Dagobah/dc/deu/vector/datacube-grid_DEU_10km.gpkg") |>
st_transform(crs = st_crs(samples)) |>
vect()
plot(grid)regular <- terra::intersect(grid, samples) |>
select(-Tile_X, -Tile_Y, -wetland_mask) |>
mutate(ID = row_number())
plot(regular)# Extract rewetting information
#library(sp)
#library(rgeos)
#wetland_buffered <- gBuffer(wetlands, byid=TRUE, width=0)
#samples <- terra::intersect(project(vect(wetlands), samples),
# samples) # Convert to a data frame and rename cols
# regular <- as.data.frame(samples) |>
# select(-wetland_mask) |>
# mutate(ID = row_number())# as.data.frame(regular) %>%
#
# ggplot(aes(x=Tile_ID)) +
# geom_bar() +
# theme_minimal() # create list of all tile folders
tile_folders <- list.dirs("/data/Dagobah/greengrass/schnesha/thesis/feature_space/TS_S2_17-22", full.names = T)
tile_folders <- tile_folders[grepl("X", tile_folders)]
sample_points_df <- c()
for (i in 1:length(tile_folders)) {
# list bands in tile (with full path) and stack
band_files <- list.files(tile_folders[i], full.names = T)
band_files <- band_files[!grepl("aux.xml", band_files)]
# select files with correct doy
band_files <- band_files[grepl("091-320", band_files)]
# read in all band for the tile as a raster stack
S2_stack <- rast(band_files)
# get the number of layers (per band)
nlayers = nlyr(S2_stack)/12
# add unique name to each band
names(S2_stack) <- paste0(names(S2_stack), "_", c(rep("BLU",nlayers) , rep("GRN",nlayers), rep("NDT",nlayers), rep("NDV",nlayers), rep("NIR",nlayers), rep("RE1",nlayers), rep("RE2",nlayers), rep("RE3",nlayers), rep("RED",nlayers), rep("SW1",nlayers), rep("SW2",nlayers), rep("TCW",nlayers)))
# current tile
current_tile <- band_files[1] |> str_sub(68,78)
# subset sample points to current tile
filtered_samples <- regular |>
filter(Tile_ID == current_tile) |>
# Reproject to match crs of DC
project(S2_stack)
# extract S2 for sample points and convert to df
sample_points <- terra::extract(S2_stack, filtered_samples, xy=T, bind=T, ID=T) |>
as.data.frame()
# bind to total df
sample_points_df <- plyr::rbind.fill(sample_points_df, as.data.frame(sample_points))
# Progress notifier
print(paste("tile", current_tile, "done", "(", i, "out of ", length(tile_folders), ")"))
}#saveRDS(sample_points_df, "/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_df_regular.rds")
#sample_points_df <- readRDS( file = "/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_df_regular.rds")sample_points_long <- sample_points_df |>
# ensure the correct order of variables before pivot longer
select(Tile_ID, x, y, everything()) |>
pivot_longer(cols = c(5:(ncol(sample_points_df))), names_to= "source", values_to = "reflectance") |>
na.omit() |>
# slice_sample(n = 10000) |>
mutate(date = as.Date(str_sub(source, 1, 9),format = "%Y%m%d"),
month = lubridate::month(date),
year = lubridate::year(date),
month_year = paste0(month,"_",year),
doy = lubridate::yday(date),
sensor = str_sub(source, 10, 14),
band = str_sub(source, 16, 19),
band = str_replace(band, "NDV", "NDVI"),
band = str_replace(band, "NDT", "NDTI"),
reflectance = case_when(band == 'NDTI' | band == 'NDVI' ~ reflectance/10000,
band != 'NDTI' | band != 'NDVI' ~ reflectance/100)) |>
# filter(reflectance>0,
# reflectance<100) |>
na.omit()sample_points_wide <- sample_points_long %>%
# need to remove source so that so that bands can be aggregated into a single row (all other cols have to match)
select(-source) |>
distinct(x, y, date, band, .keep_all=T)
# create unique ID for each each point at each time (so that multiple bands for the same location&day have the same ID)
sample_points_wide <- sample_points_wide %>%
mutate(UID = group_indices(sample_points_wide, .dots=c("x", "y", "date"))) |>
tidyr::spread(band, reflectance) |>
select(ID, Tile_ID, date, month, year, month_year, doy, sensor, BLU, GRN, NIR, RE1, RE2, RE3, RED, SW1, SW2, NDTI, NDVI, TCW, everything()) |>
filter(across(BLU:SW2, ~ . > 0),
across(BLU:SW2, ~ . < 100),
NDTI > 0,
NDTI < 1,
NDVI > -1,
NDVI < 1) |>
mutate(SWIR_ratio = (SW2/SW1),
NDWI = (GRN-NIR)/(GRN+NIR),
MNDWI = (GRN-SW1)/(GRN+SW1)) |>
filter(SWIR_ratio<10) |>
filter_all(all_vars(!is.infinite(.))) |>
# mutate_if(is.numeric, list(~na_if(., Inf))) %>%
# mutate_if(is.numeric, list(~na_if(., -Inf))) |>
na.omit()# Full data set
#saveRDS(sample_points_wide, "/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_df_regular_wide.rds")
#sample_points_wide <- readRDS( file = "/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_df_regular_wide.rds") %>% slice_sample(n=10000000)
# reduced data set
#saveRDS(slice_sample(n = 10000000, sample_points_wide), "/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_df_regular_wide_reduced.rds")
sample_points_wide <- readRDS( file = "/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_df_regular_wide_reduced.rds") %>% slice_sample(n=1000000)sample_points_wide <- sample_points_wide |>
rowwise() |>
mutate(mean_reflectance = mean(c(BLU:SW2))) |>
mutate(across(BLU:SW2, ~ ((.x)/mean_reflectance)),
NDVI = (NIR-RED)/(NIR+RED),
SWIR_ratio = (SW2/SW1),
NDWI = (GRN-NIR)/(GRN+NIR),
MNDWI = (GRN-SW1)/(GRN+SW1)) |>
select(-mean_reflectance)xy <- sample_points_wide |> select(x,y)
library(sp)
sample_points_wide_vect <- SpatialPointsDataFrame(coords = xy, data = sample_points_wide,
proj4string = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs +type=crs")) |>
vect()
#project( "epsg:3857")
#project( "epsg:4326")
writeVector(sample_points_wide_vect, "/data/Dagobah/greengrass/schnesha/thesis/feature_space/sample_points_wide_vect.gpkg", overwrite=TRUE)CSO <- rast("/data/Dagobah/greengrass/schnesha/thesis/02_CSO/mosaic/2017-2022_091-320-12_HL_CSO_SEN2L_NUM.vrt")
CSO_AVG <- rast("/data/Dagobah/greengrass/schnesha/thesis/02_CSO/mosaic/2017-2022_091-320-12_HL_CSO_SEN2L_AVG.vrt")
# Average observation per year
global(CSO, c("mean"), na.rm=TRUE)
# average days between observations
global(CSO_AVG, c("mean"), na.rm=TRUE)
# Map of number of observations per year
plot(CSO,
legend=T, col = viridis(n=100,option="D"), range=c(0,40)
)
# Map of average number of days between observations
plot(CSO_AVG,
legend=T, col = viridis(n=100,option="D"),
range=c(0,30)
)
# Aggregating days between observations
CSO_AVG_aggregated <- mean(CSO_AVG)
# map of average number of days between observation
plot(CSO_AVG_aggregated,
legend=T, col = viridis(n=100,option="D"),
range=c(0,30)
)
writeRaster(CSO_AVG_aggregated,
"/data/Dagobah/greengrass/schnesha/thesis/02_CSO/CSO_AVG_aggregated.gpkg",
filetype="GPKG",
options = NULL,
overwrite=TRUE)
terra::writeRaster(
CSO_AVG_aggregated,
filename = "/data/Dagobah/greengrass/schnesha/thesis/02_CSO/CSO_AVG_aggregated.gpkg",
filetype = 'GTiff',
datatype = 'INT2S',
overwrite = TRUE
)
# Average number of days between observations
global(CSO_AVG_aggregated, c("mean"), na.rm=TRUE)drought_index <- rast("/data/Dagobah/greengrass/schnesha/thesis/DWD/drought_index_stack.tif")
month_year <- paste0(c(1:12), "_", rep(c(2017:2022),each=12))
names(drought_index) <- month_year
plot(drought_index)
drought_index_df <- as.data.frame(drought_index) |>
pivot_longer(everything(), names_to = "month_year") |>
filter(month_year %in% c(paste0(c(4:10), "_", rep(c(2017:2022),each=12)))) |>
slice_sample(n=20000) |>
separate(month_year, into = c("month", "year"), sep = "_") %>%
mutate(date = paste(year, month, "01", sep = "-"),
date = lubridate::ymd(date))
ggplot(drought_index_df, aes(date, value, color=year)) +
stat_smooth(method = "loess") +
#stat_summary(fun=mean, geom="line", size = 1.5) + # draw a mean line in the data
#geom_line(FC_df, mapping= aes(date, value/100, group = Tile_ID), alpha=0.8) +
scale_colour_viridis_d(option = "D") +
theme_minimal() +
facet_wrap(~year, scales="free_x")precipitation <- rast("/data/Dagobah/greengrass/schnesha/thesis/DWD/precipitation_stack.tif")
names(precipitation) <- month_year
plot(precipitation)
precipitation_df <- as.data.frame(precipitation) |>
pivot_longer(everything(), names_to = "month_year") |>
filter(month_year %in% c(paste0(c(4:10), "_", rep(c(2017:2022),each=12)))) |>
slice_sample(n=20000) |>
separate(month_year, into = c("month", "year"), sep = "_") %>%
mutate(date = paste(year, month, "01", sep = "-"),
date = lubridate::ymd(date))
ggplot(precipitation_df, aes(date, value, color=year)) +
geom_line(aes(color=..y..), stat="smooth", method = "loess", size=2, alpha=0.9) +
#stat_smooth(method = "loess" ) +
#stat_summary(fun=mean, geom="line", size = 1.5) + # draw a mean line in the data
#geom_line(FC_df, mapping= aes(date, value/100, group = Tile_ID), alpha=0.8) +
scale_colour_viridis_c(option = "D") +
# scale_colour_gradient2(low = "darkblue", mid = "yellow" , high = "red",
# midpoint=median(precipitation_df$value)) +
theme_minimal() +
facet_wrap(~year, scales="free_x")ggplot(precipitation_df, aes(date, value)) +
stat_smooth(drought_index_df, mapping=aes(date, value*10, colour=..y..), method = "loess", size=2) +
stat_smooth(method = "loess" , color="black") +
# stat_summary(aes(color=value),fun=mean, geom="line", size = 1.5) + # draw a mean line in the data
#geom_line(FC_df, mapping= aes(date, value/100, group = Tile_ID), alpha=0.8) +
scale_colour_viridis_c(option = "D", direction = -1) +
theme_minimal() +
theme(legend.position="none") +
scale_y_continuous(
# Features of the first axis
name = "Precipitation",
# Add a second axis and specify its features
sec.axis = sec_axis(~./10, name="drought index")
) +
facet_wrap(~year, scales="free_x")observation dates
sample_points_wide |>
group_by(date) |>
tally() %>%
ggplot(aes(date, n)) +
geom_point(aes(size=n)) +
theme_classic()sample_points_wide |>
group_by(date, year) |>
tally() |>
ungroup() %>%
ggplot(aes(as.factor(year), n, fill=year)) +
geom_violin() +
scale_fill_viridis_c(option = "D") +
theme_classic()observations <- sample_points_wide |>
group_by(month_year, month, year) |>
summarise(count = n()) |>
mutate(month_year = as.factor(month_year),
# month = as.factor(month),
year = as.factor(year))## `summarise()` has grouped output by 'month_year', 'month'. You can override
## using the `.groups` argument.
ggplot(observations, aes(year, count, color = year, fill=month)) +
geom_bar(stat='identity', color="black") +
#scale_colour_viridis_d(option = "D") +
scale_fill_viridis_c(option = "D") +
theme_minimal()sample_points_wide %>%
group_by(month_year, month, year, Tile_ID) %>%
summarise(count = n()) %>%
mutate(month_year = as.factor(month_year),
year = as.factor(year)) %>%
ggplot(aes(year, count, color = year, fill=month)) +
geom_bar(stat='identity', color="black") +
#scale_colour_viridis_d(option = "D") +
scale_fill_viridis_c(option = "D") +
theme_minimal() +
facet_wrap(~Tile_ID)## `summarise()` has grouped output by 'month_year', 'month', 'year'. You can
## override using the `.groups` argument.
ggplot(sample_points_wide, aes(doy, NDVI, color=year, group=year)) +
geom_smooth() +
scale_colour_viridis_c(option = "D") +
theme_minimal()## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(sample_points_wide, aes(doy, NDTI, color=year, group=year)) +
geom_smooth() +
scale_colour_viridis_c(option = "D") +
theme_minimal()ggplot(na.omit(sample_points_wide), aes(doy, MNDWI, color=year, group=year)) +
geom_smooth() +
scale_colour_viridis_c(option = "D") +
theme_minimal()MNDWI time series for 2017 looks suspiciously flat. But after double checking it looks like values are there and are artifact free and that the mean seems relatively stable across the time serious. There are some variations across the smoothed trend line, but these may just be not visible in the plot abouve with larger axis scales.
ggplot(sample_points_wide, aes(doy, MNDWI, color=year, group=year)) +
# geom_smooth() +
geom_point() +
scale_colour_viridis_c(option = "D") +
theme_minimal() +
facet_wrap(~year)
sample_points_wide |>
filter(year==2017) |>
group_by(doy) |>
summarise(mean = mean(MNDWI)) |>
ggplot(aes(x=doy, y=mean)) +
geom_line() +
# geom_smooth() +
theme_minimal()# No dissagregation
ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
stat_bin_hex(bins = 200) + #geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_fill_continuous(type = "viridis") +
theme_minimal() ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
stat_bin_hex(bins = 200) +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
facet_wrap(~sensor)ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
stat_bin_hex(bins = 100) +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
facet_wrap(~month)ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
stat_bin_hex(bins = 100) +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
facet_wrap(~year)ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
stat_bin_hex(bins = 100) +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
facet_grid(rows= vars(year), cols = vars(month))ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
stat_bin_hex(bins = 100) +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) +
facet_wrap(~Tile_ID)## Warning: Removed 4119 rows containing non-finite values (`stat_binhex()`).
## Warning: Removed 14 rows containing missing values (`geom_hex()`).
ggplot(sample_points_wide, aes(NDVI, SWIR_ratio, color=MNDWI)) +
geom_point() +
scale_color_continuous(type = "viridis") +
theme_minimal() ggplot(sample_points_wide, aes(NDVI, SWIR_ratio, color=MNDWI)) +
geom_point() +
scale_color_continuous(type = "viridis") +
theme_minimal() +
facet_grid(rows= vars(year), cols = vars(month))ggplot(sample_points_wide, aes(NDVI, MNDWI, color=SWIR_ratio)) +
geom_point() +
scale_color_continuous(type = "viridis") +
theme_minimal()ggplot(sample_points_wide, aes(MNDWI, SWIR_ratio, color=NDVI)) +
geom_point() +
scale_color_continuous(type = "viridis") +
theme_minimal()library(plotly)
sample_points_wide %>%
plot_ly(x=~NDVI, y=~SWIR_ratio, type="scatter", mode="markers", color=~MNDWI, text = ~sample_points_wide$ID) library(plotly)
sample_points_wide %>%
plot_ly(x=~NDVI, y=~MNDWI, z=~SWIR_ratio, type="scatter3d", mode="markers", color=~NDVI) |>
add_trace(data =endmembers, x=~NDVI, y=~MNDWI, z=~SWIR_ratio, name = 'endmembers', mode = 'markers', color="red")Overall I was not able to find reliable consistent trends between points in different regions of the feature space. Below are some observations, but they are not super consistent or reliable.
Other Visual observation in QGIS
Here I used a simple rules based filtering approach to isolate potential endmembers, and scope how endmemebers points distribute in the feature space
First I used a looser set of rules to inlcude more endmembers. These were visually assesed in QGIS to determine if any visual trends could be identified.
PV <- sample_points_wide |>
filter(NDVI> 0.94 & SWIR_ratio<0.4 & MNDWI < -0.5) |>
mutate(class = "PV")
NPV <- sample_points_wide |>
filter(NDVI< 0.32 & SWIR_ratio<0.65 & MNDWI < -0.5) |>
mutate(class = "NPV")
soil <- sample_points_wide |>
filter(NDVI< 0.32 & SWIR_ratio>0.9 & MNDWI < -0.4)|>
mutate(class = "soil")
water <- sample_points_wide |>
filter(NDVI< 0.45 & SWIR_ratio>0.75 & MNDWI > 0.7)|>
mutate(class = "water")
endmembers_loose <- rbind(PV, NPV, soil, water)
endmembers_loose_long <- endmembers_loose |>
pivot_longer(cols = c(9:19, 23,24,25), names_to= "wavelength", values_to = "reflectance")Following the visual assement of the looser selection, I created a stricter set of rules with few final endmembers. Notable I manually seleced some “real” wetland PV points, are based on the pervious numerically filtering only Forest or arg. plots were selected for PV.
PV <- sample_points_wide |>
filter(NDVI> 0.965 & SWIR_ratio>0.375 & SWIR_ratio<0.39 & MNDWI > -0.75 & MNDWI < -0.55 ) |>
mutate(class = "PV")
NPV <- sample_points_wide |>
filter(NDVI> 0.18 & NDVI< 0.21 & SWIR_ratio<0.65 & SWIR_ratio>0.55 & MNDWI < -0.45) |>
mutate(class = "NPV")
soil <- sample_points_wide |>
filter(NDVI< 0.275 & NDVI> 0.23 & SWIR_ratio>0.975 & MNDWI < -0.6)|>
mutate(class = "soil")
water <- sample_points_wide |>
filter(NDVI< -0.45 & NDVI> -0.6 & MNDWI > 0.88 & SWIR_ratio<0.75 & SWIR_ratio>0.5)|>
mutate(class = "water")
### Create endmember Dfs
endmembers <- rbind(PV, NPV, soil, water)
endmembers_long <- endmembers |>
pivot_longer(cols = c(9:19, 23,24,25), names_to= "wavelength", values_to = "reflectance")
endmember_endpoints <- endmembers[!duplicated(endmembers$class), ]PV <- sample_points_wide |>
filter(NDVI> 0.965 & SWIR_ratio<0.39 & TCW > -5 & NIR>35) |>
mutate(class = "PV")
NPV <- sample_points_wide |>
filter(NDVI> 0.25 & NDVI< 0.32 & SWIR_ratio<0.6 & SWIR_ratio>0.55 & TCW < -28.5 & TCW > -35 ) |>
mutate(class = "NPV")
soil <- sample_points_wide |>
filter(NDVI< 0.3 & NDVI> 0.17 & SWIR_ratio>0.98 & TCW < -26 & TCW > -33 )|>
mutate(class = "soil")
water <- sample_points_wide |>
filter(NDVI< -0.9 & SWIR_ratio>0.9 & TCW >-1 & TCW <4 & NDWI >0.75 & MNDWI >0.75)|>
mutate(class = "water")
### Create endmember Dfs
endmembers <- rbind(PV, NPV, soil, water)
endmembers_long <- endmembers |>
pivot_longer(cols = c("BLU","GRN","RED","RE1","RE2","RE3","NIR","SW1","SW2","NDVI","SWIR_ratio", "NDWI","MNDWI","TCW"), names_to= "wavelength", values_to = "reflectance")
endmember_endpoints <- endmembers[!duplicated(endmembers$class), ]#saveRDS(endmembers, file = "/data/Dagobah/greengrass/schnesha/thesis/feature_space/endmembers.rds")
endmembers <- readRDS(file = "/data/Dagobah/greengrass/schnesha/thesis/feature_space/endmembers.rds")
endmembers_long <- endmembers |>
pivot_longer(cols = c("BLU","GRN","RED","RE1","RE2","RE3","NIR","SW1","SW2","NDVI","SWIR_ratio", "NDWI","MNDWI","TCW"), names_to= "wavelength", values_to = "reflectance")
endmember_endpoints <- endmembers[!duplicated(endmembers$class), ]end_members_spec <- endmembers_long |>
mutate(wavelength_num = case_when(wavelength == "BLU" ~ 490,
wavelength == "GRN" ~ 560,
wavelength == "RED" ~ 665,
wavelength == "RE1" ~ 705,
wavelength == "RE2" ~ 740,
wavelength == "RE3" ~ 783,
wavelength == "NIR" ~ 842,
wavelength == "SW1" ~ 1610,
wavelength == "SW2" ~ 2190)) |>
na.omit()
ggplot(end_members_spec, aes(wavelength_num, reflectance)) +
geom_line(end_members_spec, mapping= aes(wavelength_num, reflectance, group=ID), color="grey", alpha=0.5) +
stat_summary(aes(group=class, color=class), fun=mean, geom="line", size = 1.5) + # draw a mean line in the data
#geom_line(aes(wavelength_num, reflectance, group = ID), alpha=0.8) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal()## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(endmembers, aes(NDVI, SWIR_ratio, fill =class, color=class)) +
geom_point() +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal() Although on the feature space there is overlapp between NPV and water, in the plot below we can see that they still drastically differ from each other when looking at spectral properties such as MNDWI, and red, RE and SW bands
endmembers_long |>
mutate(wavelength = factor(wavelength, levels=c("BLU","GRN","RED","RE1","RE2","RE3","NIR","SW1","SW2","NDVI","SWIR_ratio", "NDWI","MNDWI","TCW"))) %>%
ggplot(aes(x=reflectance, fill=class)) +
geom_density() +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal() +
theme(legend.position="bottom") +
facet_wrap(vars(wavelength), scales = "free", shrink=T, ncol = 5)ggplot(endmembers, aes(x=month, fill=class)) +
geom_bar() +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal()ggplot(endmembers, aes(x=month, fill=class)) +
geom_density(alpha=.7) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal()ggplot(endmembers, aes(x=year, fill=class)) +
geom_bar() +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal()ggplot(endmembers, aes(x=month, fill=class)) +
geom_bar() +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal() +
facet_wrap(vars(year))ggplot(slice_sample(sample_points_wide, n = 250), aes(NDVI, SWIR_ratio, color=MNDWI)) +
#geom_point() +
#geom_point(data=endmembers[!endmembers$class=="water",], aes(NDVI, SWIR_ratio, shape=class), color ="red") +
#geom_point(data=endmembers, aes(NDVI, SWIR_ratio, shape=class), color ="red") +
# geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, SWIR_ratio), color ="red", fill="grey", alpha=0.6) +
# geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, SWIR_ratio), color ="black", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal() side_view <- ggplot(slice_sample(sample_points_wide, n = 250000), aes(NDVI, SWIR_ratio, color=MNDWI)) +
geom_point() +
#geom_point(data=endmembers[!endmembers$class=="water",], aes(NDVI, SWIR_ratio, shape=class), color ="red") +
geom_point(data=endmembers, aes(NDVI, SWIR_ratio, shape=class), color ="red") +
geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()
ggMarginal(side_view, type = "densigram", fill="grey")## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## ℹ The deprecated feature was likely used in the ggExtra package.
## Please report the issue at <https://github.com/daattali/ggExtra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
geom_bin_2d(bins = 200) +
geom_point(data=endmembers, aes(NDVI, SWIR_ratio, color =class),size=2) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
# geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()Higher NDVI outlyers at MNDWI edge. Mostly Intensive crop which exhibit no patchiness
Postive MNDWI points with higher than expected NVDI values. Low water level/water edge points with wetland vegetation cover. Most ( = >80%) come from two locations (Penne Esturary and a small lake)
top_view <- ggplot(slice_sample(sample_points_wide, n = 250000), aes(NDVI, MNDWI, color=SWIR_ratio)) +
geom_point() +
geom_point(data=endmembers[!endmembers$class=="water",], aes(NDVI, MNDWI, shape=class), color ="red") +
#geom_point(data=endmembers, aes(NDVI, SWIR_ratio, shape=class, color =class),size=2) +
geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()
ggMarginal(top_view, type = "densigram", fill="grey")ggplot(sample_points_wide, aes(NDVI, MNDWI)) +
geom_bin_2d(bins = 200) +
geom_point(data=endmembers, aes(NDVI, MNDWI, color =class), alpha=0.9, size=2) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, MNDWI), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()ggplot(sample_points_wide, aes(NDVI, NDWI)) +
geom_bin_2d(bins = 200) +
geom_point(data=endmembers, aes(NDVI, NDWI, color =class), size=2, alpha=0.9) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, NDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, NDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, NDWI), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, NDWI), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()ggplot(sample_points_wide, aes(NDVI, TCW)) +
geom_bin_2d(bins = 200) +
geom_point(data=endmembers, aes(NDVI, TCW, color =class), size=2, alpha=0.9) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
# geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, TCW), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, TCW), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, TCW), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, TCW), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()end_view <- ggplot(slice_sample(sample_points_wide, n = 250000), aes(MNDWI, SWIR_ratio, color=NDVI)) +
geom_point() +
geom_point(data=endmembers[!endmembers$class=="water",], aes(MNDWI, SWIR_ratio, shape=class), color ="red") +
#geom_point(data=endmembers, aes(NDVI, SWIR_ratio, shape=class, color =class), size=2, alpha=0.9) +
geom_polygon(data=endmember_endpoints[-c(1),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()
ggMarginal(end_view, type = "densigram", fill="grey")## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## ℹ The deprecated feature was likely used in the ggExtra package.
## Please report the issue at <https://github.com/daattali/ggExtra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(sample_points_wide, aes(MNDWI, SWIR_ratio)) +
geom_bin_2d(bins = 200) +
geom_point(data=endmembers, aes(MNDWI, SWIR_ratio, color =class), size=2, alpha=0.9) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
geom_polygon(data=endmember_endpoints[-c(1),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(MNDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()ggplot(sample_points_wide, aes(NDWI, SWIR_ratio)) +
geom_bin_2d(bins = 200) +
geom_point(data=endmembers, aes(NDWI, SWIR_ratio, color =class), size=2, alpha=0.9) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
geom_polygon(data=endmember_endpoints[-c(1),], aes(NDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(NDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(NDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(NDWI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()ggplot(sample_points_wide, aes(TCW, SWIR_ratio)) +
geom_bin_2d(bins = 200) +
geom_point(data=endmembers, aes(TCW, SWIR_ratio, color =class), size=2, alpha=0.9) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
# geom_polygon(data=endmember_endpoints[-c(1),], aes(TCW, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(2),], aes(TCW, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(3),], aes(TCW, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
# geom_polygon(data=endmember_endpoints[-c(4),], aes(TCW, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
geom_bin_2d(bins = 80) +
geom_point(data=subset(endmembers, select = -c(year,month)), aes(NDVI, SWIR_ratio, color =class), size=1.5) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_continuous(type = "viridis", limits=c(3,1500), na.value = "white") +
theme_minimal() +
theme(legend.position = 'bottom') +
facet_wrap(~month)ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
geom_bin_2d(bins = 80) +
geom_point(data=subset(endmembers, select = -c(year,month)), aes(NDVI, SWIR_ratio, color =class), size=1.5) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_continuous(type = "viridis", limits=c(3,1500), na.value = "white") +
theme_minimal() +
theme(legend.position = 'bottom') +
facet_wrap(~year)ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
geom_bin_2d(bins = 80) +
geom_point(data=subset(endmembers, select = -c(year,month)), aes(NDVI, SWIR_ratio, color =class), size=1.5) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_continuous(type = "viridis", limits=c(3,1500), na.value = "white") +
theme_minimal() +
theme(legend.position = 'bottom') +
facet_grid(rows= vars(year), cols = vars(month))ggplot(sample_points_wide, aes(NDVI, SWIR_ratio)) +
geom_bin_2d(bins = 80) +
geom_point(data=subset(endmembers, select = -c(year,month, Tile_ID)), aes(NDVI, SWIR_ratio, color =class), size=1.5) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
scale_fill_continuous(type = "viridis", limits=c(1,1500), na.value = "white") +
theme_minimal() +
theme(legend.position = 'bottom') +
facet_wrap(~Tile_ID)Side_View <- ggplot(slice_sample(sample_points_wide, n = 2500), aes(NDVI, SWIR_ratio, color=MNDWI)) +
geom_point() +
#geom_point(data=endmembers[!endmembers$class=="water",], aes(NDVI, SWIR_ratio, shape=class), color ="red") +
geom_polygon(data=endmember_endpoints[-c(1),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(2),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(3),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
geom_polygon(data=endmember_endpoints[-c(4),], aes(NDVI, SWIR_ratio), color ="red", fill="red", alpha=0.1) +
scale_fill_continuous(type = "viridis", limits=c(2,2000), na.value = "white") +
theme_minimal()
ggMarginal(Side_View, type = "histogram")
ggMarginal(Side_View, type = "densigram", fill="grey")library(plotly)
# sample_points_wide %>%
# plot_ly(x=~NDVI, y=~MNDWI, z=~SWIR_ratio, type="scatter3d", mode="markers", color=~NDVI) |>
# add_trace(data =endmembers, x=~NDVI, y=~MNDWI, z=~SWIR_ratio, name = 'endmembers',
# type = 'scatter3d', opacity = 1, color="red")
sample_points_wide %>%
slice_sample(n = 1) |>
plot_ly(
x = ~ NDVI,
y = ~ TCW,
z = ~ SWIR_ratio,
type = "scatter3d",
mode = "markers",
color = ~ NDVI,
opacity = 0.7,
size = 0.5,
hovertemplate = paste(
'NDVI = %{x}',
'<br>SWIR_ratio = %{z} <br>',
'TCW = %{y}<br>',
'UID = %{text}'
),
text = ~ UID
) |>
add_trace(
data = endmembers[!duplicated(endmembers$class), ],
#data = sample_points_wide[sample_points_wide$UID %in% c(6074684, 3386315, 4649660, 1147114),],
x = ~ NDVI,
y = ~ TCW,
z = ~ SWIR_ratio,
name = 'endmembers',
intensity = ~NDVI,
colorscale = list(c(0,'grey'),
c(0.45,'grey'),
c(0.5, 'grey'),
c(1, 'grey')),
type = "mesh3d"
)
# sample_points_wide %>%
# plot_ly(x=~NDVI, y=~SWIR_ratio, z=~MNDWI, type="scatter3d", mode="markers", color=~NDVI, opacity=0.7) |>
# add_trace(data =endmembers[!duplicated(endmembers$class),], x=~NDVI, y=~SWIR_ratio, z=~MNDWI, name = 'endmembers', type="mesh3d")
sample_points_wide %>%
plot_ly(x=~MNDWI, y=~NDVI, z=~SWIR_ratio, type="scatter3d", mode="markers", color=~NDVI, opacity=0.7) |>
add_trace(data =endmembers[!duplicated(endmembers$class),], x=~MNDWI, y=~NDVI, z=~SWIR_ratio, name = 'endmembers', type="mesh3d")
# sample_points_wide %>%
# plot_ly(x=~MNDWI, y=~SWIR_ratio, z=~NDVI, type="scatter3d", mode="markers", color=~NDVI, opacity=0.7) |>
# add_trace(data =endmembers[!duplicated(endmembers$class),], x=~MNDWI, y=~SWIR_ratio, z=~NDVI, name = 'endmembers', type="mesh3d")
sample_points_wide %>%
plot_ly(x=~SWIR_ratio, y=~MNDWI, z=~NDVI, type="scatter3d", mode="markers", color=~NDVI, opacity=0.7) |>
add_trace(data =endmembers[!duplicated(endmembers$class),], x=~SWIR_ratio, y=~MNDWI, z=~NDVI, name = 'endmembers', type="mesh3d")
# sample_points_wide %>%
# plot_ly(x=~SWIR_ratio, y=~NDVI, z=~MNDWI, type="scatter3d", mode="markers", color=~NDVI, opacity=0.7) |>
# add_trace(data =endmembers[!duplicated(endmembers$class),],x=~SWIR_ratio, y=~NDVI, z=~MNDWI, name = 'endmembers', type="mesh3d")xy <- endmembers |> select(x,y)
library(sp)
endmembers_vect <- SpatialPointsDataFrame(coords = xy,
data = subset( endmembers),
proj4string = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs +type=crs")) |>
vect()
#project( "epsg:3857")
#project( "epsg:4326")
plot(endmembers_vect, "class", col =c("peru", "olivedrab", "darkred", "steelblue"))
writeVector(endmembers_vect,
"/data/Dagobah/greengrass/schnesha/thesis/feature_space/test_endmembers.gpkg",
filetype="GPKG",
options = NULL,
overwrite=TRUE)endmembers_txt <- endmembers |>
#mutate(class = as.numeric(as.factor(class))) |>
#filter(Tile_ID %in% c("X0070_Y0038","X0070_Y0039")) |>
select(x, y, class)
endmembers_txt |> group_by(class) |> tally()
endmembers_txt <- SpatialPointsDataFrame(coords = select(endmembers_txt, x, y) ,
data = subset(endmembers_txt),
proj4string = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs +type=crs")) |>
vect() |>
as.data.frame()
write.table(endmembers_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/02_samples/endmembers.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(endmembers_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts_indicies/02_samples/endmembers.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)coordinates_txt <- endmembers |>
#mutate(class = as.numeric(as.factor(class))) |>
#filter(Tile_ID %in% c("X0070_Y0038","X0070_Y0039")) |>
select(x, y) |>
mutate(x = round(x, 6),
y = round(y, 6))
write.table(coordinates_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/02_samples/coordinates.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(coordinates_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts_indicies/02_samples/coordinates.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)features_txt <- endmembers |>
#mutate(class = as.numeric(as.factor(class))) |>
# filter(Tile_ID %in% c("X0070_Y0038","X0070_Y0039")) |>
select(BLU, GRN, RED, NIR,
RE1, RE2, RE3, SW1, SW2,
#NDVI
) |>
mutate(#NDVI = NDVI*100,
across(everything(), ~ .x *100))
write.table(features_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/02_samples/features.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)
########################
### with all indicies###
########################
features_txt <- endmembers |>
#mutate(class = as.numeric(as.factor(class))) |>
# filter(Tile_ID %in% c("X0070_Y0038","X0070_Y0039")) |>
select(BLU, GRN, RED, NIR,
RE1, RE2, RE3, SW1, SW2,
NDVI, MNDWI, NDWI, TCW
) |>
mutate(across(NDVI:NDWI, ~ .x *100), #apply an extra 10 to get to 1000
across(everything(), ~ .x *100),
across(everything(), ~ as.integer(.x)))
write.table(features_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts_indicies/02_samples/features.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)response_txt <- endmembers |>
mutate(class = as.numeric(as.factor(class))) |>
#filter(Tile_ID %in% c("X0070_Y0038","X0070_Y0039")) |>
select(class)
write.table(response_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/02_samples/response.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)
write.table(response_txt, "/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts_indicies/02_samples/response.txt", quote = FALSE, row.names = FALSE, col.names = FALSE)Used for magic parameters
### Get list of all dates in time series
tile_folders <- list.dirs("/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/01_TS_S2_17-22", full.names = T)
tile_folders <- tile_folders[grepl("X", tile_folders)]
dates <- c()
for (i in 1:length(tile_folders)) {
tile_dates <- list.files(tile_folders[i], full.names = T) |>
str_sub(-18,-5)
dates <- dates |>
append(tile_dates[grepl("SEN2", tile_dates)])
# remove potentially corrupted dates
dates <- dates[!grepl("tif", dates)]
}
dates <- unique(dates)
write(dates,
"/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/prm_files/observation_dates_magic_prm.txt",
sep = " ",
ncolumns=length(dates)
)# starting force from a dokcer image str
dforce = 'docker run -e FORCE_CREDENTIALS=/app/credentials -e BOTO_CONFIG=/app/credentials/.boto -v $HOME:/app/credentials -v /data:/data -v /mnt:/mnt -v $HOME:/home/schnesha -w $PWD -u $(id -u):$(id -g) -t --rm davidfrantz/force:latest'
ml_models <- list.files("/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/prm_files/05_ml_predict_prm", full.names = T)
tile_folders <- list.dirs("/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/05_predictions", full.names = T)
tile_folders <- tile_folders[grepl("X", tile_folders)]
# LINUX command
#for file in /data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/prm_files/05_ml_predict_prm/*; do dforce force-higher-level "$file"; done
setwd("/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/prm_files")
if (length(dates) == length(ml_models)) {
for (i in 1:length(ml_models)) {
cmd <- paste(dforce, "force-higher-level", ml_models[i])
system(cmd, wait = TRUE)
#Sys.sleep(2)
# for (j in 1:length(tile_folders)) {
# if (length(list.files(tile_folders[j], pattern = 'HL', full.names = T))==0) next
#
# list.files(tile_folders[j],
# pattern = 'HL',
# full.names = T)
#
# unmaned_files <-
# list.files(tile_folders[j],
# pattern = 'HL',
# full.names = T)
#
# file.rename(unmaned_files,
# str_replace(
# unmaned_files,
# pattern = paste0('HL_ML_', c("MLI", "MLP", "MLU"), ".tif"),
# paste0("ML_", c("MLI", "MLP", "MLU"), "_", dates[i], ".tif")
# ))
# }
#print(paste(dates[i], "is done"))
}
}# import updatede wetland mask
wetland_mask <- rast("/data/Dagobah/greengrass/schnesha/thesis/wetland_masks/wetland_mask_v2.tif") #|> crop(ext(4546026, 4676026, 3334920, 3524920))
# There are no methods for negative buffering of rasters in R. This is a workaround that inverts the wetland mask and using an ifelse inverts na values and buffers into the valid raster pixels
wetland_mask <- !buffer(ifel(is.na(wetland_mask), 1, NA),
width=50)
# Replace false values with NA so that they can me omitted in the sampling process
wetland_mask <- subst(wetland_mask, FALSE, NA)
# Sample points on a 200x200 m regular grid
samples <- spatSample(wetland_mask,
size = size(wetland_mask)/5000, # get total size and divide to reduce
method = "regular",
as.points =T,
exhaustive=T, na.rm=TRUE)
plot(samples)
grid <- read_sf("/data/Dagobah/dc/deu/vector/datacube-grid_DEU_10km.gpkg") |>
st_transform(crs = st_crs(samples)) |>
vect()
plot(grid)
regular <- terra::intersect(grid, samples) |>
select(-Tile_X, -Tile_Y, -wetland_mask) |>
mutate(ID = row_number())
plot(regular)# create list of all tile folders
tile_folders <- list.dirs("/data/Dagobah/greengrass/schnesha/thesis/04_fraction_mapping_ts/05_predictions", full.names = T)
tile_folders <- tile_folders[grepl("X", tile_folders)]
FC_df_wide <- c()
for (i in 1:length(tile_folders)) {
# list bands in tile (with full path) and stack
band_files <- list.files(tile_folders[i], full.names = T)
band_files <- band_files[!grepl("aux.xml", band_files)]
# read in all band for the tile as a raster stack
FC_stack <- rast(band_files)
# get the number of layers (per band)
nlayers = nlyr(FC_stack)/4
# add unique name to each band
names(FC_stack) <- paste0( rep(str_sub(band_files, 103, 110),each=4), "_", c("NPV", "PV", "soil", "water"))
# current tile
current_tile <- band_files[1] |> str_sub(80,90)
# subset sample points to current tile
filtered_samples <- regular |>
filter(Tile_ID == current_tile)
# extract S2 for sample points and convert to df
sample_points <- terra::extract(FC_stack, filtered_samples, xy=T, bind=T, ID=T) |>
as.data.frame()
# bind to total df
FC_df_wide <- plyr::rbind.fill(FC_df_wide, as.data.frame(sample_points))
# Progress notifier
print(paste("tile", current_tile, "done", "(", i, "out of ", length(tile_folders), ")"))
}# Full data set
#saveRDS(FC_df_wide, "/data/Dagobah/greengrass/schnesha/thesis/feature_space/FC_df_wide.rds")
FC_df_wide <- readRDS( file = "/data/Dagobah/greengrass/schnesha/thesis/feature_space/FC_df_wide.rds") FC_ts_point <- FC_df_wide |>
select(x, y, Tile_ID, ID)
xy <- FC_ts_point |> select(x,y)
library(sp)
FC_ts_point <- SpatialPointsDataFrame(coords = xy,
data = subset( FC_ts_point),
proj4string = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs +type=crs")) |>
vect()
plot(FC_ts_point)
writeVector(FC_ts_point,
"/data/Dagobah/greengrass/schnesha/thesis/feature_space/FC_ts_points.gpkg",
filetype="GPKG",
options = NULL,
overwrite=TRUE)FC_df <- FC_df_wide %>%
pivot_longer(-c(x, y, Tile_ID, ID), names_to="date") %>%
na.omit() %>%
# slice_sample(n=100000) %>%
mutate(class = str_sub(date, 10,-1),
date = as.Date(str_sub(date, 1,8), "%Y%m%d"),
month = lubridate::month(date),
year = lubridate::year(date),
month_year = paste0(month,"_",year)) |>
rename(fractional_cover = value) |>
slice_sample(n=10000000)ggplot(FC_df, aes(as.factor(year), fractional_cover, group=year, fill=year)) +
geom_boxplot() +
scale_fill_continuous(type = "viridis") +
theme_minimal() ggplot(FC_df, aes(month, fractional_cover, group=month, fill=month)) +
geom_boxplot(notch=T) +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
facet_wrap(~year)ggplot(FC_df, aes(month, fractional_cover, group=month, fill=month)) +
geom_violin() +
scale_fill_continuous(type = "viridis") +
theme_minimal() +
facet_wrap(~year)ggplot(FC_df, aes(date, fractional_cover/100)) +
#stat_summary(FC_df, mapping= aes(date, fractional_cover/100, group = interaction(class, Tile_ID)), fun=mean, geom="line", color="grey", alpha=0.5) +
stat_summary(aes(group=class, color=class), fun=mean, geom="line", linewidth = 1.5) + # draw a mean line in the data
stat_summary(aes(group=class, color=class), fun=mean, geom="point", linewidth = 1.5) +
#geom_line(FC_df, mapping= aes(date, fractional_cover/100, group = Tile_ID), alpha=0.8) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) + theme_minimal() +
ylab("Fractional cover (%)") +
facet_wrap(~year, scales="free_x")## Warning in stat_summary(aes(group = class, color = class), fun = mean, geom =
## "point", : Ignoring unknown parameters: `linewidth`
FC_df |> filter(Tile_ID %in% c("X0068_Y0038","X0068_Y0042", "X0068_Y0039", "X0070_Y0038", "X0071_Y0039", "X0071_Y0045" )) %>%
ggplot( aes(date, fractional_cover/100)) +
stat_summary(mapping= aes(date, fractional_cover/100, group = interaction(class, Tile_ID)), fun=mean, geom="line", color="grey", alpha=0.5) +
stat_summary(aes(group=class, color=class), fun=mean, geom="line", linewidth = 1.5) + # draw a mean line in the data
stat_summary(aes(group=class, color=class), fun=mean, geom="point", linewidth = 1.5) +
#geom_line(FC_df, mapping= aes(date, fractional_cover/100, group = Tile_ID), alpha=0.8) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) + theme_minimal() +
ylab("Fractional cover (%)") +
facet_grid(cols=vars(year), rows = vars(Tile_ID), scales="free_x")## Warning in stat_summary(aes(group = class, color = class), fun = mean, geom =
## "point", : Ignoring unknown parameters: `linewidth`
ggplot(FC_df, aes(date, fractional_cover/100, color = class)) +
geom_smooth() +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal() +
ylab("Fractional cover (%)") +
facet_wrap(~year, scales="free_x")## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
FC_df |>
filter(Tile_ID %in% c("X0070_Y0038")) %>%
ggplot(aes(date, fractional_cover/100, color=class)) +
# geom_point() +
geom_smooth() +
# geom_line(mapping= aes(date, fractional_cover/100, group = interaction(class, ID)), alpha=0.8) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal() +
ylab("Fractional cover (%)") +
facet_wrap(~year, scales="free")## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
FC_df |>
filter(ID %in% c(20:20)) %>%
ggplot(aes(date, fractional_cover/100, color=class)) +
geom_point() +
geom_line(mapping= aes(date, fractional_cover/100, group = interaction(class, ID)), alpha=0.8) +
scale_color_manual(values=c("peru", "olivedrab", "darkred", "steelblue")) +
theme_minimal() +
ylab("Fractional cover (%)") +
facet_wrap(~year, scales="free")drought_index_df <- drought_index_df |>
mutate(month = as.numeric(month),
year = as.numeric(year)) |>
rename(drought_index=value) |>
select(-date)
FC_drought <- FC_df |>
filter(class=="PV") |>
group_by(month_year, ID) |>
mutate(fractional_cover= mean(fractional_cover)) |>
left_join(drought_index_df, by = c("month", "year")) |>
ungroup()
library(lme4)
# prep data frame
# merge
# model
ggplot(slice_sample(FC_drought, n=100000), aes(drought_index, fractional_cover)) +
#geom_point()+
geom_smooth() #+facet_wrap(~month)
m_PV_drougt = lmer(fractional_cover ~ drought_index + (1|Tile_ID) + (1|month_year), #+ (1|x) + (1|y)
data = slice_sample(FC_drought, n=10000))
summary(m_PV_drougt)
plot(m_PV_drougt)
qqnorm(resid(m_PV_drougt))
qqline(resid(m_PV_drougt))
ggstatsplot::plot_model(m_PV_drougt, type = "re", show.values = TRUE, vline.color = "black")
ggstatsplot::ggcoefstats(x = m_PV_drougt, conf.int = TRUE,
conf.level = 0.95, conf.method = "HPDinterval", exclude.intercept = T, stats.labels =F) predictions <- list.files(tile_folders,
pattern = 'MLP',
full.names = T)
predictions <- predictions[!grepl("aux.xml", predictions)]
predictions <- rast(predictions)
# add date to name of layer
names(predictions) <- paste0(names(predictions), "_", str_sub(sources(predictions), 103, 110) )
# subsample to reduce size
#predictions <- spatSample(predictions, 1000000, method="random", as.raster=T)
# extract each layer and rename with class type
PV = predictions[[grepl("CLASS_001", names(predictions))]]
names(PV) = paste0(str_extract(names(PV),"(?<=_)[^_]+$"), "_PV")
NPV = predictions[[grepl("CLASS_002", names(predictions))]]
names(NPV) = paste0(str_extract(names(NPV),"(?<=_)[^_]+$"), "_NPV")
soil = predictions[[grepl("CLASS_003", names(predictions))]]
names(soil) = paste0(str_extract(names(soil),"(?<=_)[^_]+$"), "_soil")
water = predictions[[grepl("CLASS_004", names(predictions))]]
names(water) = paste0(str_extract(names(water),"(?<=_)[^_]+$"), "_water")
# restock corretly names layers
FC_stack <- c(PV, NPV, soil, water)
plot(FC_stack)# FC_df <- as.data.frame(FC_stack) %>%
# pivot_longer(everything(), names_to="date") %>%
# na.omit() %>%
# slice_sample(n=100000) %>%
# mutate(class = str_sub(date, 10,-1),
# date = as.Date(str_sub(date, 1,8), "%Y%m%d"),
# month = lubridate::month(date),
# year = lubridate::year(date),
# month_year = paste0(month,"_",year))In oder to to attempt to better understand the inter class differences in PV and water I attempted to you a k-means algorithm subdived groups and potentially aid in visual identification of trends in QGIS.
The outcome of this visual interpretation is that PV seems to subdevided into three chunks, correspoming with high NDVI (mostly dense forest & some crops), mid NDVI (less dense forest & crops), low NDVI (pixels with light tree cover & some less intensive crops & “real” wetland PV)
When subdeviding the water feature space using the ploly data plotting tool it was not possible to find specific trends in the feature space. Furthemore the Kmeans clustering did not differentiate water into clear subgroups and catagories points into 2 classes, differentiated by being slightly more/less bright. In QGIS there were no differences in distribution of location of these classes.
library(factoextra)
library(cluster)kmeans_data <- na.omit(endmembers_loose) %>%
select(-x,-y) |>
select(c(9:19,21, 22,23)) |>
scale() |>
as.data.frame() #|> slice_sample(n=50000)#perform k-means clustering with k = 6 clusters
km <- kmeans(kmeans_data, centers = 6, nstart = 25)
#plot results of final k-means model
fviz_cluster(km, data = kmeans_data)foo <- endmembers_loose# |> distinct(ID, .keep_all = T)
#add cluster assigment to original data
final_data <- cbind(foo, cluster = km$cluster) |>
mutate(cluster = as.factor(cluster))ggplot(final_data, aes(cluster, fill=cluster)) +
scale_color_brewer(palette = "Dark2") +
geom_bar()ggplot(final_data, aes(NDVI, SWIR_ratio, color=cluster)) +
geom_point(alpha=0.8) +
#geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_color_brewer(palette = "Dark2") +
theme_minimal() +
# scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2))cluster_long <- final_data |>
pivot_longer(cols = c(9:19,21, 22,23), names_to= "wavelength", values_to = "reflectance") |>
mutate(cluster = as.character(cluster))
cluster_long$cluster[cluster_long$cluster==1] = "PV_low_NDVI"
cluster_long$cluster[cluster_long$cluster==2] = "PV_soil/NPV"
cluster_long$cluster[cluster_long$cluster==3] = "PV_high_SWIR"
cluster_long$cluster[cluster_long$cluster==4] = "PV_mid_NDVI"
cluster_long$cluster[cluster_long$cluster==5] = "water"
cluster_long$cluster[cluster_long$cluster==6] = "PV_high_NDVI"
ggplot(cluster_long, aes(x=reflectance, fill=cluster)) +
geom_density(alpha=0.9) +
#scale_color_discrete(type = "viridis") +
#scale_fill_discrete(type = "viridis") +
scale_fill_brewer(palette = "Dark2") +
theme_minimal() +
facet_wrap(vars(wavelength), scales = "free") xy <- cluster_long |> select(x,y)
library(sp)
library(leaflet)
clustered_vect <- SpatialPointsDataFrame(coords = xy,
data = cluster_long,
proj4string = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs +type=crs")) |>
terra::vect() |>
#project( "epsg:3857")
terra::project("epsg:4326")
writeVector(clustered_vect,
"/data/Dagobah/greengrass/schnesha/thesis/feature_space/test_endmembers_kmeans.gpkg",
filetype="GPKG",
options = NULL,
overwrite=TRUE)
cluster_color <- colorFactor(palette = 'Dark2', clustered_vect$cluster)
clustered_vect |>
filter(cluster %in% c("PV_high_NDVI", "PV_mid_NDVI", "PV_low_NDVI")) |>
leaflet() %>%
addCircleMarkers(popup = ~as.character(id), radius= 6, label = ~as.character(id), stroke = FALSE, color= ~cluster_color(cluster),
fillOpacity = 1) |> addTiles() library(factoextra)
library(cluster)kmeans_data <- na.omit(sample_points_wide) %>%
select(-x,-y) |>
select(c(9:23)) |>
scale() |>
as.data.frame() #|> slice_sample(n=50000)#perform k-means clustering with k = 6 clusters
km <- kmeans(kmeans_data, centers = 4, nstart = 25)
#plot results of final k-means model
fviz_cluster(km, data = kmeans_data)foo <- sample_points_wide# |> distinct(ID, .keep_all = T)
#add cluster assigment to original data
final_data <- cbind(foo, cluster = km$cluster) |>
mutate(cluster = as.factor(cluster))ggplot(final_data, aes(cluster, fill=cluster)) +
scale_color_brewer(palette = "Dark2") +
geom_bar()ggplot(final_data, aes(NDVI, SWIR_ratio, color=cluster)) +
geom_point(alpha=0.8) +
#geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_color_brewer(palette = "Dark2") +
theme_minimal() +
# scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2)) xy <- final_data |> select(x,y)
library(sp)
library(leaflet)
clustered_vect <- SpatialPointsDataFrame(coords = xy, data = final_data,
proj4string = CRS("+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs +type=crs")) |>
vect() |>
#project( "epsg:3857")
project( "epsg:4326")
cluster_color <- colorFactor(palette = 'Dark2', clustered_vect$cluster)
leaflet(data = clustered_vect) %>%
addCircleMarkers(popup = ~as.character(id), radius= 6, label = ~as.character(id), stroke = FALSE, color= ~cluster_color(cluster),
fillOpacity = 1) |> addTiles() #calculate gap statistic based on number of clusters
km <- kmeans_data |>
select(NDVI, SWIR_ratio, MNDWI) |>
#perform k-means clustering with k = 6 clusters
kmeans(centers = 4, nstart = 25)
#plot results of final k-means model
fviz_cluster(km, data = kmeans_data)
foo <- sample_points_wide
#add cluster assigment to original data
final_data <- cbind(foo, cluster = km$cluster) |>
mutate(cluster = as.factor(cluster))ggplot(final_data, aes(cluster, fill=cluster)) +
scale_color_brewer(palette = "Dark2") +
geom_bar()ggplot(final_data, aes(NDVI, SWIR_ratio, color=cluster)) +
geom_point(alpha=0.8) +
#geom_point(data=reference_spectra, aes(NDVI, SWIR_ratio), color ="red") +
scale_color_brewer(palette = "Dark2") +
theme_minimal() +
# scale_x_continuous(expand = c(0, 0), limits = c(-0.2, 1)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1.2))